import requests
from bs4 import BeautifulSoup

headers = {
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.125 Safari/537.36"
}


def get_each_page_info(link):
    r = requests.get(link, headers=headers)
    soup = BeautifulSoup(r.text, 'lxml')
    house_list = soup.find_all('li', class_="list-item")
    for house in house_list:
        name = house.find('div', class_='house-title').a.text.strip()
        price = house.find('span', class_='price-det').text.strip()
        unit_price = house.find('span', class_='unit-price').text.strip()
        num_room = house.find('div', class_='details-item').span.text
        area = house.find('div', class_='details-item').contents[3].text
        floor = house.find('div', class_='details-item').contents[5].text
        year = house.find('div', class_='details-item').contents[7].text
        broker = house.find('span', class_='broker-name').text
        address = house.find('span', class_='comm-address').text.strip()
        tag_list = house.find_all('span', class_='item-tags')
        tags = [i.text for i in tag_list]
        print(name, price, unit_price, num_room, area, floor, year, broker, address, tags)


for i in range(1, 5):
    link = 'https://hf.anjuke.com/sale/p' + str(i)
    get_each_page_info(link)